Florian Oswald Sciences Po, 2018
using Plots
using Optim
plotlyjs()
f(x) = exp(x) - x^4
minf(x) = -f(x)
brent = optimize(minf,0,2,Brent())
golden = optimize(minf,0,2,GoldenSection())
println("brent = $brent")
println("golden = $golden")
plot(f,0,2)
Roots.jlIntervalRootFinding.jlusing Roots
#Â find the zeros of this function:
f(x) = exp(x) - x^4
## bracketing
fzero(f, 8, 9) # 8.613169456441398
fzero(f, -10, 0) # -0.8155534188089606
using IntervalRootFinding, IntervalArithmetic
-10..10
X = IntervalBox(1..3, 2..4)
a = @interval(0.1, 0.3)
b = @interval(0.3, 0.6)
a + b
rts = IntervalRootFinding.roots(x->x^2 - 2, -10..10, Bisection)
using Optim
using OptimTestProblems
for (name, prob) in MultivariateProblems.UnconstrainedProblems.examples
println(name)
end
rosenbrock = MultivariateProblems.UnconstrainedProblems.examples["Rosenbrock"]
# grid search on rosenbrock
grid = collect(-1.0:0.1:3);
grid2D = [[i;j] for i in grid,j in grid];
val2D = map(rosenbrock.f,grid2D);
r = findmin(val2D);
println("grid search results in minimizer = $(grid2D[r[2]])")
All descent methods follow more or less this structure. At iteration $k$,
# https://github.com/JuliaNLSolvers/LineSearches.jl
using LineSearches
algo_hz = Newton(linesearch = HagerZhang())
res_hz = Optim.optimize(rosenbrock.f, rosenbrock.g!, rosenbrock.h!, rosenbrock.initial_x, method=algo_hz)
# Optim.jl has a TrustRegion for Newton (see below for Newton's Method)
NewtonTrustRegion(; initial_delta = 1.0, # The starting trust region radius
delta_hat = 100.0, # The largest allowable trust region radius
eta = 0.1, #When rho is at least eta, accept the step.
rho_lower = 0.25, # When rho is less than rho_lower, shrink the trust region.
rho_upper = 0.75) # When rho is greater than rho_upper, grow the trust region (though no greater than delta_hat).
res = Optim.optimize(rosenbrock.f, rosenbrock.g!, rosenbrock.h!, rosenbrock.initial_x, method=NewtonTrustRegion())
# Optim.jl again
GradientDescent(; alphaguess = LineSearches.InitialPrevious(),
linesearch = LineSearches.HagerZhang(),
P = nothing,
precondprep = (P, x) -> nothing)
# there is a dedicated LineSearch package: https://github.com/JuliaNLSolvers/LineSearches.jl
GD = optimize(rosenbrock.f, rosenbrock.g!,[0.0, 0.0],GradientDescent())
GD1 = optimize(rosenbrock.f, rosenbrock.g!,[0.0, 0.0],GradientDescent(),Optim.Options(iterations=5000))
GD2 = optimize(rosenbrock.f, rosenbrock.g!,[0.0, 0.0],GradientDescent(),Optim.Options(iterations=50000))
println("gradient descent = $GD")
println("\n")
println("gradient descent 2 = $GD1")
println("\n")
println("gradient descent 3 = $GD2")
optimize(rosenbrock.f, rosenbrock.g!, rosenbrock.h!, [0.0, 0.0], Newton(),Optim.Options(show_trace=true))
@show optimize(rosenbrock.f, rosenbrock.g!, rosenbrock.h!, [-1.0, 3.0], BFGS());
# low memory BFGS
@show optimize(rosenbrock.f, rosenbrock.g!, rosenbrock.h!, [0.0, 0.0], LBFGS());
# start to setup a basis function, i.e. unit vectors to index each direction:
basis(i, n) = [k == i ? 1.0 : 0.0 for k in 1 : n]
function cyclic_coordinate_descent(f, x, ε)
Δ, n = Inf, length(x)
while abs(Δ) > ε
x′ = copy(x)
for i in 1 : n
d = basis(i, n)
x = line_search(f, x, d)
end
Δ = norm(x - x′)
end
return x
end
function generalized_pattern_search(f, x, α, D, ε, γ=0.5)
y, n = f(x), length(x)
evals = 0
while α > ε
improved = false
for (i,d) in enumerate(D)
x′ = x + α*d
y′ = f(x′)
evals += 1
if y′ < y
x, y, improved = x′, y′, true
D = unshift!(deleteat!(D, i), d)
break
end
end
if !improved
α *= γ
end
end
println("$evals evaluations")
return x
end
D = [[1,0],[0,1],[-1,-0.5]]
D = [[1,0],[0,1]]
y=generalized_pattern_search(rosenbrock.f,zeros(2),0.8,D,1e-6 )
fmincon and fminsearch implements it.optimize(rosenbrock.f, [0.0, 0.0], NelderMead())
Lagarias et al. (SIOPT, 1999): At present there is no function in any dimension greater than one, for which the original Nelder-Mead algorithm has been proved to converge to a minimizer.
Given all the known inefficiencies and failures of the Nelder-Mead algorithm [...], one might wonder why it is used at all, let alone why it is so extraordinarily popular.
$$\mathbf{x}^{(k+1)} \longleftarrow \mathbf{x}^{(k)} +\alpha^k\mathbf{g}^{(k)} + \mathbf{\varepsilon}^{(k)}$$
#Â f: function
# x: initial point
# T: transition distribution
#Â t: temp schedule, k_max: max iterations
function simulated_annealing(f, x, T, t, k_max)
y = f(x)
ytrace = zeros(typeof(y),k_max)
x_best, y_best = x, y
for k in 1 : k_max
x′ = x + rand(T)
y′ = f(x′)
Δy = y′ - y
if Δy ≤ 0 || rand() < exp(-Δy/t(k))
x, y = x′, y′
end
if y′ < y_best
x_best, y_best = x′, y′
end
ytrace[k] = y_best
end
return x_best,ytrace
end
function ackley(x, a=20, b=0.2, c=2Ï€)
d = length(x)
return -a*exp(-b*sqrt(sum(x.^2)/d)) - exp(sum(cos.(c*xi) for xi in x)/d) + a + e
end
using Plots
plotlyjs()
surface(-30:0.1:30,-30:0.1:30,(x,y)->ackley([x, y]))
using Distributions
d = Dict()
for sig in (1,5,25), t1 in (1,10,25)
tmp = [simulated_annealing(ackley,[15,15],MvNormal(2,sig),x->t1/x,100) for i in 1:300]
d[(sig,t1)] = Dict()
d[(sig,t1)][:y] = mapslices(x->ackley(x),hcat([tmp[i][1] for i in 1:300]...),[1])
d[(sig,t1)][:ytrace] = hcat([tmp[i][2] for i in 1:300]...)
end
d
# x=[simulated_annealing(ackley,[15,15],MvNormal(2,1),x->1.0/x,100) for i in 1:100]
# y=[simulated_annealing(ackley,[15,15],MvNormal(2,5),x->10.0/x,100) for i in 1:100]
# map((x)->ackley([x[1],x[2] ]),y)
Recall our core optimization problem:
$$ \min_{x\in\mathbb{R}^n} f(x) \text{ s.t. } x \in \mathcal{X} $$where both $f$ and $h$ have continuous partial derivatives.
In other words, we want to find the best $x$ s.t. $h(x) = 0$ and we have
$$ \nabla f(x) = \lambda \nabla h(x) $$for some Lagrange Mutliplier $\lambda$
Suppose we have
$$ \begin{aligned} \min_x & -\exp\left( -\left( x_1 x_2 - \frac{3}{2} \right)^2 - \left(x_2 - \frac{3}{2}\right)^2 \right) \\ \text{subject to } & x_1 - x_2^2 = 0 \end{aligned} $$We form the Lagrangiagn:
$$ \mathcal{L}(x_1,x_2,\lambda) = -\exp\left( -\left( x_1 x_2 - \frac{3}{2} \right)^2 - \left(x_2 - \frac{3}{2}\right)^2 \right) - \lambda(x_1 - x_2^2) $$Then we compute the gradient wrt to $x_1,x_2,\lambda$, set to zero and solve.
gr()
f(x1,x2) = -exp.(-(x1.*x2 - 3/2).^2 - (x2-3/2).^2)
c(x1) = sqrt(x1)
x=0:0.01:3.5
contour(x,x,(x,y)->f(x,y),lw=1.5,levels=[collect(0:-0.1:-0.85)...,-0.887,-0.95,-1])
plot!(c,0.01,3.5,label="",lw=2,color=:black)
scatter!([1.358],[1.165],markersize=5,markercolor=:red,label="Constr. Optimum")
Suppose now we had
$$ \begin{aligned} \min_\mathbf{x} & f(\mathbf{x}) \\ \text{subject to } & g(\mathbf{x}) \leq 0 \end{aligned} $$which, if the solution lies right on the constraint boundary, means that
$$ \nabla f - \mu \nabla g = 0 $$for some scalar $\mu$ - as before.
contour(x,x,(x,y)->f(x,y),lw=1.5,levels=[collect(0:-0.1:-0.85)...,-0.887,-0.95,-1])
plot!(c,0.01,3.5,label="",lw=2,color=:black,fill=[0],fillalpha=0.5,fillcolor=:blue)
scatter!([1.358],[1.165],markersize=5,markercolor=:red,label="Constr. Optimum")
c2(x1) = 1+sqrt(x1)
contour(x,x,(x,y)->f(x,y),lw=1.5,levels=[collect(0:-0.1:-0.85)...,-0.887,-0.95,-1])
plot!(c2,0.01,3.5,label="",lw=2,color=:black,fill=[0],fillalpha=0.5,fillcolor=:blue)
scatter!([1],[1.5],markersize=5,markercolor=:red,label="Unconstr. Optimum")
The preceding four conditions are called the Kuhn-Karush-Tucker (KKT) conditions. In the above order, and in general terms, they are:
The KKT conditions are the FONCs for problems with smooth constraints.
We can combine equality and inequality constraints:
$$ \mathcal{L}(\mathbf{x},\mathbf{\lambda},\mathbf{\mu}) = f(\mathbf{x}) + \sum_{i} \lambda_i h_i(\mathbf{x}) + \sum_j \mu_j g_j(\mathbf{x}) $$where, notice, we reverted the sign of $\lambda$ since this is unrestricted.
As one approaches the constraint boundary, the barrier function goes to infinity. Properties:
$p_\text{barrier}(\mathbf{x})$ is continuous
NLopt.jl¶NLopt expects contraints always to be formulated in the format
$$ g(x) \leq 0 $$
where $g$ is your constraint functionusing NLopt
count = 0 # keep track of # function evaluations
function myfunc(x::Vector, grad::Vector)
if length(grad) > 0
grad[1] = 0
grad[2] = 0.5/sqrt(x[2])
end
global count
count::Int += 1
println("f_$count($x)")
sqrt(x[2])
end
function myconstraint(x::Vector, grad::Vector, a, b)
if length(grad) > 0
grad[1] = 3a * (a*x[1] + b)^2
grad[2] = -1
end
(a*x[1] + b)^3 - x[2]
end
opt = Opt(:LD_MMA, 2)
lower_bounds!(opt, [-Inf, 0.])
xtol_rel!(opt,1e-4)
min_objective!(opt, myfunc)
inequality_constraint!(opt, (x,g) -> myconstraint(x,g,2,0), 1e-8)
inequality_constraint!(opt, (x,g) -> myconstraint(x,g,-1,1), 1e-8)
(minfunc,minx,ret) = NLopt.optimize(opt, [1.234, 5.678])
println("got $minfunc at $minx after $count iterations (returned $ret)")
NLopt format, the constraint is $x_1 + x_2 - 0.8 \leq 0$function rosenbrockf(x::Vector,grad::Vector)
if length(grad) > 0
grad[1] = -2.0 * (1.0 - x[1]) - 400.0 * (x[2] - x[1]^2) * x[1]
grad[2] = 200.0 * (x[2] - x[1]^2)
end
return (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
end
function r_constraint(x::Vector, grad::Vector)
if length(grad) > 0
grad[1] = 2*x[1]
grad[2] = 2*x[2]
end
return x[1]^2 + x[2]^2 - 0.8
end
opt = Opt(:LD_MMA, 2)
lower_bounds!(opt, [-5, -5.0])
min_objective!(opt,(x,g) -> rosenbrockf(x,g))
inequality_constraint!(opt, (x,g) -> r_constraint(x,g))
ftol_rel!(opt,1e-9)
NLopt.optimize(opt, [-1.0,0.0])
JuMP.jlJuMP uses MathProgBase.jl, which converts your problem formulation into a standard representation of an optimization problem.using Clp
m = Model(solver=ClpSolver()) # provide a solver
#Â Define variables
@variable(m, x ) # No bounds
@variable(m, x >= lb ) # Lower bound only (note: 'lb <= x' is not valid)
@variable(m, x <= ub ) # Upper bound only
@variable(m, lb <= x <= ub ) # Lower and upper bounds
# we can create arrays of a variable
N = 2
@variable(m, x[1:M,1:N] >= 0 )
# or put them in a block
@variables m begin
x
y >= 0
Z[1:10], Bin
X[1:3,1:3], SDP
q[i=1:2], (lowerbound = i, start = 2i, upperbound = 3i)
t[j=1:3], (Int, start = j)
end
# Equivalent to:
@variable(m, x)
@variable(m, y >= 0)
@variable(m, Z[1:10], Bin)
@variable(m, X[1:3,1:3], SDP)
@variable(m, q[i=1:2], lowerbound = i, start = 2i, upperbound = 3i)
@variable(m, t[j=1:3], Int, start = j)
# bounds can depend on indices
@variable(m, x[i=1:10] >= i )
@constraint(m, x[i] - s[i] <= 0) # Other options: == and >=
@constraint(m, sum(x[i] for i=1:numLocation) == 1)
@objective(m, Max, 5x + 22y + (x+y)/2) # or Min
generator syntax for sums:@objective(sum(x[i] + y[i]/pi for i = I1, j = I2 if i+j < some_val))
##Â Simple example
using JuMP
using Clp
m = Model(solver = ClpSolver())
@variable(m, 0 <= x <= 2 )
@variable(m, 0 <= y <= 30 )
@objective(m, Max, 5x + 3*y )
@constraint(m, 1x + 5y <= 3.0 )
print(m)
status = solve(m)
println("Objective value: ", getobjectivevalue(m))
println("x = ", getvalue(x))
println("y = ", getvalue(y))
# JuMP: Rosenbrock Example
# Instead of hand-coding first and second derivatives, you only have to give `JuMP` expressions for objective and constraints.
# Here is an example.
using Ipopt
let
m = Model(solver=IpoptSolver())
@variable(m, x)
@variable(m, y)
@NLobjective(m, Min, (1-x)^2 + 100(y-x^2)^2)
solve(m)
println("x = ", getvalue(x), " y = ", getvalue(y))
end
# not bad, right?
# adding the constraint from before:
let
m = Model(solver=IpoptSolver())
@variable(m, x)
@variable(m, y)
@NLobjective(m, Min, (1-x)^2 + 100(y-x^2)^2)
@NLconstraint(m,x^2 + y^2 <= 0.8)
solve(m)
println("x = ", getvalue(x), " y = ", getvalue(y))
end
JuMP.# Copyright 2015, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
# example modified
using Distributions
distrib = Normal(4.5,3.5)
n = 10000
data = rand(distrib,n);
m = Model(solver=IpoptSolver())
@variable(m, mu, start = 0.0)
@variable(m, sigma >= 0.0, start = 1.0)
@NLobjective(m, Max, -(n/2)*log(2Ï€*sigma^2)-sum((data[i] - mu) ^ 2 for i = 1:n)/(2*sigma^2))
solve(m)
println("μ = ", getvalue(mu),", mean(data) = ", mean(data))
println("σ^2 = ", getvalue(sigma)^2, ", var(data) = ", var(data))
x = -3:0.01:3
dx = repmat(linspace(-3,3,7),1,7)
contourf(x,x,(x,y)->x+y,color=:blues)
scatter!(dx,dx',legend=false,markercolor=:white)
plot!(x->sqrt(4-x^2),-2,2,c=:white)
plot!(x->-sqrt(4-x^2),-2,2,c=:white)
scatter!([-2,-1,0],[0,-1,-2],c=:red)
scatter!([-sqrt(2)],[-sqrt(2)],c=:red,markershape=:cross,markersize=9)
# Copyright 2017, Iain Dunning, Joey Huchette, Miles Lubin, and contributors
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#############################################################################
# JuMP
# An algebraic modeling langauge for Julia
# See http://github.com/JuliaOpt/JuMP.jl
#############################################################################
# knapsack.jl
#
# Solves a simple knapsack problem:
# max sum(p_j x_j)
# st sum(w_j x_j) <= C
# x binary
#############################################################################
using JuMP, Cbc
# Maximization problem
m = Model(solver=CbcSolver())
@variable(m, x[1:5], Bin)
profit = [ 5, 3, 2, 7, 4 ]
weight = [ 2, 8, 4, 2, 5 ]
capacity = 10
# Objective: maximize profit
@objective(m, Max, dot(profit, x))
# Constraint: can carry all
@constraint(m, dot(weight, x) <= capacity)
# Solve problem using MIP solver
status = solve(m)
println("Objective is: ", getobjectivevalue(m))
println("Solution is:")
for i = 1:5
print("x[$i] = ", getvalue(x[i]))
println(", p[$i]/w[$i] = ", profit[i]/weight[i])
end